 
clear

set matsize 11000
set more off
	
********************* Determine the recentralization reform year *******************

	use ../source/city_fiscal.dta, clear

	* Exclude four province-level municipalities 
	drop if pro_id == 11 | pro_id == 12 | pro_id == 31 | pro_id == 50
	drop data_source notes Coder /// unused variables
	
	xtset city_id year  // balanced: 333 cities, 9 years each

	recode pro_wave (1=2014)(2=2015)(3=2016), gen(pr_year)

	* Generate court expenditure in the baseline year 
	gen court_base2 = exp_court if pr_year == year
	by city_id: egen court_base = min(court_base2)
	replace court_base = . if year < pr_year
	drop court_base2    

	* Code the judical reform year
	
	forvalues i = 0(10)100{
		
		* Step 1: Fiscal spending over court is reduced to i% or below of the baseline year
		
		gen reform_`i' = (exp_court <= court_base * `i'/100) if !mi(exp_court) & !mi(court_base)

		* Step 2: Code those cases where the expenditure is already a very small amount as reform.
		* 
		* 	Note: We determine this small amount as `i' (unit: 10,000 Yuan) when baseline year's fiscal 
		* 	spending is larger than 10 million Yuan. This means that this year's spending over court is 
		* 	less than i/1000 (ranging between 0, when i == 0, and 10% when i == 100) of the baseline year. 
		* 	The results would be similar if we exp_court must be smaller than a certain value (e.g., 10).
		
		replace reform_`i' = 1 if court_base > 1000 & exp_court <= `i' & reform_`i' == . 
		
		* Step 3：Extrapolate the reform status for missing values (i.e., reform_`i' == .)
			
			* Step 3.1: Previous two years are coded as reform, this year is also coded as reform.
			
			replace reform_`i' = 1 if L.reform_`i' == 1 & L2.reform_`i' == 1 & reform_`i' == .  
			
			* Step 3.2: Code as reform if the previous year and the next year are all coded as reform.
			
			replace reform_`i' = 1 if L.reform_`i' == 1 & F.reform_`i' == 1 & reform_`i' == .   
			
			* Step 3.3: As a special case for 3.2, for 2019 (end year) there is no next year.
			
			replace reform_`i' = 1 if L.reform_`i' == 1 & year == 2019 & reform_`i' == .  
			
			* Step 3.4: Code reform as 0 for all years before the province announces reform.
			
			replace reform_`i' = 0 if year < pr_year  
				
		* Step 4: Determine the earliest year that the city starts the reform
		
		gen reform_`i'_year2 = year if reform_`i' == 1
		replace reform_`i'_year2 = 9999 if year == 2019 & reform_`i' == 0  
		by city_id: egen reform_`i'_year = min(reform_`i'_year2)
		drop reform_`i'_year2 
		drop reform_`i'
	}

*   Refrom:  Continuous measurement  
	gen court_base4 = court_base
	by city_id: egen court_base5 = min(court_base4)
	
	gen reform_cont = exp_court/court_base5
	replace reform_cont = 1 if reform_cont > 1 & !mi(reform_cont)
	replace reform_cont = 1 - reform_cont
	
*	Save the dataset
	keep city_id year reform_*_year reform_cont pro_wave
	replace city_id = city_id/100
	merge 1:1 city_id year using ../source/court_exp.dta
	drop _merge
	save ../data/city_cutoffs.dta, replace

* 	Save a saparate file to be merged for placebo test	
	keep city_id year reform_50_year merge_highcourt pro_wave
	rename city_id placebo_city_id
	rename reform_50_year placebo_reform_50_year	
	rename merge_highcourt placebo_merge_highcourt	
	rename pro_wave pl_pro_wave
	save ../data/city_cutoffs_placebo.dta, replace

*********************** Build lawsuit case dataset ******************************

	use ../source/firm_case.dta, clear

	* Province reform year
	merge m:1 pro_code using ../source/reform_pro_fund.dta
	drop _merge

	* Merge court's reform year
	rename trial_year year
	merge m:1 city_id year using ../data/city_cutoffs.dta
	drop if _merge==2
	drop _merge
	
	gen accept_year = year(accept_date)

	* Reform time of basic courts in province-rank municipalities is the provincial reform year 
	forvalues i = 0(10)100{
	   replace reform_`i'_year = court_2 if pro_code == 11 | pro_code == 12 | pro_code == 31 | pro_code == 50   
	}
	rename year trial_year

	* Determine local firms
	forvalues x=1/2 {
		gen firm`x'_local = 1 if firm`x'_city_id == city_id & !missing(firm`x'_city_id) & !missing(city_id) & city_id !=999
		replace firm`x'_local = 0 if firm`x'_city_id != city_id & !missing(firm`x'_city_id) & !missing(city_id) & city_id !=999
		replace firm`x'_local = 0 if firm`x'_city == "999" & !missing(city_id) & city_id !=999		
	}
	gen any_local = (firm1_local + firm2_local == 1)

	* Determine if the non-local firm is from another province
	gen other_pro = 0 if any_local == 1
	replace other_pro = 1 if (firm2_pro_id != pro_code | firm1_pro_id != pro_code) 
	
	* Determine if the local firm is the plaintiff
	gen local_plaintiff = 0 
	replace local_plaintiff = 1 if ((plaintiff == 1 & firm1_local == 1) | (plaintiff == 0 & firm2_local == 1))

	* Four types of cases involving listed firms and/or local firms
	gen local_listed = (firm1_local==1 & local_plaintiff == 1) 
	gen local_nonlisted = (firm2_local==1 & local_plaintiff == 1) 
	gen nonlocal_listed = (firm2_local==1 & local_plaintiff == 0) 
	gen nonlocal_nonlisted = (firm2_local==0 & local_plaintiff == 0) 

	* Recode provinces' reform year, due to missing values in municipals 
	drop pro_wave
	gen pro_wave = 2014 if pro_code == 22 | pro_code == 31 | pro_code == 42 | pro_code == 44 | pro_code == 46 | pro_code == 63
	replace pro_wave = 2015 if pro_code == 14 | pro_code == 15 | pro_code == 23 | pro_code == 32 | pro_code == 33 | pro_code == 34 | pro_code == 35 | pro_code == 37 | pro_code == 50 | pro_code == 52 | pro_code == 53 | pro_code == 64
	replace pro_wave = 2016 if pro_code == 11 | pro_code == 12 | pro_code == 13 | pro_code == 21 | pro_code == 36 | pro_code == 41 | pro_code == 43 | pro_code == 45 | pro_code == 51 | pro_code == 54 | pro_code == 61 | pro_code == 62 | pro_code == 65
	
	* Build the indicator for the cases tried after the province implements the reform 

	gen centralize_pro = 1 if court_2 <= trial_year & !missing(trial_year)
	replace centralize_pro = 0 if court_2 > trial_year & !missing(trial_year)  
		
		* Province-level municipalities' intermediate courts are always under the control of province-level government 
		replace centralize_pro = 1 if court_level == 2 & municipal == 1
		
		* Recode reform as one if a court's fiscal budget is integrated into the provincial government
		replace centralize_pro = 1 if merge_highcourt == 1                  
	
	* City treatment: the court's reform status with varying fiscal thresholds
	
	forvalues i=0(10)100 {
		gen centralize_city`i' =.
		replace centralize_city`i' = 1 if reform_`i'_year <= trial_year & !missing(trial_year)
		replace centralize_city`i' = 0 if reform_`i'_year > trial_year & !missing(trial_year) & !missing(reform_`i'_year) 
		replace centralize_city`i' = 1 if court_level == 2 & municipal == 1 
		replace centralize_city`i' = 1 if merge_highcourt == 1               
		replace centralize_city`i' = 0 if trial_year <= pro_wave & municipal == 0  
		replace centralize_city`i' = 0 if trial_year <= pro_wave & (municipal == 1 & court_level == 1) 
	}

	* Placebo treatment: Identify the reform year of the other litigant's city
	gen placebo_city_id = firm1_city_id if firm2_local == 1 
	replace placebo_city_id = firm2_city_id if firm1_local == 1 

	rename trial_year year
	merge m:1 placebo_city_id year using ../data/city_cutoffs_placebo.dta   // municipal data excluded
	drop if _merge==2
	drop _merge
	rename year trial_year

	recode pl_pro_wave (1=2014)(2=2015)(3=2016)
  
	gen pl_centralize50 = 1 if placebo_reform_50_year <= trial_year & !missing(trial_year)
	replace pl_centralize50 = 0 if placebo_reform_50_year > trial_year & !missing(trial_year) & !missing(placebo_reform_50_year)
	replace pl_centralize50 = 1 if placebo_merge_highcourt == 1   
	replace pl_centralize50 = 0 if trial_year <= pl_pro_wave & (placebo_city_id != 1101 & placebo_city_id != 1201 & placebo_city_id != 3101 & placebo_city_id != 5001) & !mi(pl_pro_wave) //excluding municipal city
	replace pl_centralize50 = 0 if trial_year <= pl_pro_wave & ((placebo_city_id == 1101 | placebo_city_id == 1201 | placebo_city_id == 3101 | placebo_city_id == 5001) & court_level == 1) & !mi(pl_pro_wave)   
	replace pl_centralize50 = 1 if court_level == 1 & trial_year >= 2017 & (placebo_city_id == 1101 | placebo_city_id == 1201 | placebo_city_id == 3101 | placebo_city_id == 5001) 
	replace pl_centralize50 = 1 if court_level == 2 & (placebo_city_id == 1101 | placebo_city_id == 1201 | placebo_city_id == 3101 | placebo_city_id == 5001) 
   
	* Outcome varaibles

	gen win_low = 1 if win == 2
	replace win_low = 0 if win == 1 | win == 0 | win == 9  // lower bound

	gen win_up = 1 if win == 2 | win == 1 | win == 9
	replace win_up = 0 if win == 0   // upper bound

	* Local firm wins
	gen local_win_low = ((win_low == 1 & firm1_local == 1) | (win_up == 0 & firm2_local == 1))
	replace local_win_low = . if win_low == . | firm1_local == . | firm2_local == .

	gen local_win_up = ((win_up == 1 & firm1_local == 1) | (win_low == 0 & firm2_local == 1))
	replace local_win_up = . if win_up == . | firm1_local == . | firm2_local == .

	* Alternative coding: local firm win the case (3 classes)
	gen local_win_final = 2 if local_win_low == 1 & local_win_up == 1
	replace local_win_final = 1 if local_win_low == 0 & local_win_up == 1
	replace local_win_final = 0 if local_win_low == 0 & local_win_up == 0  
	
	* Lawsuit reconciled
	replace reconcile = 1 if reconcile == 2	

	* Case enforcement 

	gen enforce_1y_low = 1 if enforce_1y == 2
	replace enforce_1y_low = 0 if enforce_1y == 1 | enforce_1y == 0 | enforce_1y == 9  // lower bound

	gen enforce_1y_up = 1 if enforce_1y == 2 | enforce_1y == 1 | enforce_1y == 9
	replace enforce_1y_up = 0 if enforce_1y == 0   // upper bound
	
	gen enforce_quality_low = 1 if enforce_quality == 2
	replace enforce_quality_low = 0 if enforce_quality == 1 | enforce_quality == 0  // lower bound

	gen enforce_quality_up = 1 if enforce_quality == 2 | enforce_quality == 1 
	replace enforce_quality_up = 0 if enforce_quality == 0   // upper bound
	
	drop enforce_1y enforce_quality

	* Sample: 2012--2018 and involving local firms
	keep if trial_year >= 2012 & trial_year <= 2018
	drop if any_local == 0

	* Merge Province/City control variables: one year before trial
	capture drop year
	gen year = trial_year - 1
	merge m:1 city_id year using ../source/city_pro_panel.dta
	drop if _merge == 2
	drop _merge year

	* Merge listed firm (firm 1) characteristics data: one year before trial
	capture drop id_year
	gen pre_year = trial_year - 1 if trial_year !=.
	tostring pre_year, replace
	gen name_year = firm_name + "_" + pre_year if firm_name !="." & pre_year != "."
	merge m:1 name_year using ../source/Listed_Firms_V2.dta
	drop if _merge == 2
	drop _merge

	* Merge listed firm connection data
	gen symbol = substr(firm_code,1,6)
	gen yearend = trial_year
	merge m:1 symbol yearend using ../source/Firm_Connection.dta
		drop if _merge == 2
		drop _merge
	replace firm_pc = 0 if firm_pc ==.
	replace pc_low = 0 if pc_low ==.
	replace pc_city = 0 if pc_city ==.
	gen pc_county = (pc_low == 1 & pc_city == 0)
	gen firm_nlpc = (firm_pc == 1 & pc_county == 0 & pc_city == 0)

	* Rescaling control variables
	gen stake_bi = stake_10k / 100000
	gen pr_govsub = (gov_subsidy / trading_revenue)*100
	gen pr_tax = (turnover_tax / trading_revenue)*100
	gen reg_cap_bi = reg_capital / 1000000 // billion
	replace net_cash_flow = net_cash_flow / 1000000 // billion
	replace net_profit = net_profit / 1000000 // billion
	replace city_total_ipo=0 if city_total_ipo==.
	replace tax_vadded_c = tax_vadded_c / 100000   // billion
	replace investment_fixed_c = investment_fixed_c / 100000   // billion
	replace publicfinance_income_c = (publicfinance_income_c) / population_hr_c  // 	per-capita(thousand)
	replace publicfinance_expenditure_c = (publicfinance_expenditure_c) / population_hr_c  //per-capita(thousand)
	replace population_unemployed_c = population_unemployed_c / (population_hr_c*100)  // unemployment rate 
	replace population_hr_c = population_hr_c/100    // million
	replace pro_pop = pro_pop/100   // million
	replace pro_gdp = pro_gdp/10 // billion
	replace pro_rev = pro_rev/10 // billion
	replace pro_exp = pro_exp/10 // billion
	replace rev_vat = rev_vat/10 // billion
	gen pro_gdp_percapita = pro_gdp/(pro_pop/1000)  // Yuan
	gen pro_rev_percapita = pro_rev/(pro_pop/1000)  // Yuan
	gen pro_exp_percapita = pro_exp/(pro_pop/1000)  // Yuan

	* Manufacturing sector & real estate
	egen csrc_ind = group(industry)
	gen manufacture = 0
	replace manufacture = 1 if csrc_ind == 1 | csrc_ind == 2 | csrc_ind == 5 ///
	| csrc_ind == 9 | csrc_ind == 13 | csrc_ind == 14 | csrc_ind == 15  ///
    |csrc_ind == 20 | csrc_ind == 31 | csrc_ind == 40 | csrc_ind == 47 	| csrc_ind == 58 ///
	| csrc_ind == 61 | csrc_ind == 65 | csrc_ind == 71					  
	gen real_estate = 0
	replace real_estate = 1 if csrc_ind==26

	* Firm characteristics and FEs
	gen lemploy = ln(employee_num + 1)  // firm size
	gen firm_est_year = substr(reg_date,1,4)
	destring firm_est_year pre_year, replace
	gen firm_age = pre_year - firm_est_year  // firm age
	egen firm_id = group(firm_name)  // firm FEs

	* Provinice-year indentifier 
	gen pro_year_fe = pro_code*10000 + trial_year

	* Drop unused variables
	drop firm_code firm_name industry any_local
	drop placebo_city_id pl_pro_wave placebo_merge_highcourt 
	drop placebo_reform_50_year csrc_ind pc_low court_2
	drop exp_npc exp_ps exp_edu exp_rd exp_env exp_culture exp_welfare
	drop mean_roa mean_alr name_year pre_year yearend symbol
	drop income_total exp_total exp_salary exp_trial exp_enforce  
	drop firm_est_year reg_capital reg_date pro_exp pro_gdp pro_rev
	drop stake_10k trading_revenue
	drop win_low turnover_tax gov_subsidy

	save ../data/case_data.dta, replace

	
	
*********************** Build fiscal dataset ******************************

	clear
	set matsize 11000
	set more off

	use ../data/city_cutoffs.dta

	***merging city expenditure data
	merge 1:1 city_id year using ../source/city_exp.dta
	drop _merge
	
	***merging court expenditure data
	merge 1:1 city_id year using ../source/court_exp.dta
	drop _merge 
	
	***merging city and provincial characteristics
	merge 1:1 city_id year using ../source/city_pro_panel.dta
	drop if _merge==2
	drop _merge
	sort city_id year
	
	***dealing with missing values
	replace pro_code = pro_code[_n-1] if pro_code==.
	replace city_total_ipo=0 if city_total_ipo==. & year <=2018
	capture drop centralize_city*

	* code the treatment variables
	gen centralize_city50 =.
	replace centralize_city50 = 1 if reform_50_year <= year
	replace centralize_city50 = 0 if reform_50_year > year & !missing(reform_50_year)
	replace centralize_city50 = 0 if reform_50_year == 9999
	replace centralize_city50 = 1 if merge_highcourt == 1 
	
	* Recode the reform year for cities with courts budget merging with high court
	* Note: reform year should be the first year we see the merge of courts budget
	list city_id if centralize_city50 == 1 & merge_highcourt == 1 & reform_50_year > year
	replace reform_50_year = 2017 if city_id == 1503
	replace reform_50_year = 2015  if city_id == 4201 | city_id == 4202 | city_id == 4203 |city_id == 4205 |city_id == 4206 | city_id == 4207 | city_id == 4208 |city_id == 4209 |city_id == 4210 |city_id == 4211 | city_id == 4213	
	replace reform_50_year = 2018 if city_id == 4303 | city_id == 4306 | city_id == 4309
	
	* Variable for the year when the province is asked by Beijing to start the reform
    drop pro_wave
	gen pro_wave = 2014 if pro_code == 22 | pro_code == 31 | pro_code == 42 | pro_code == 44 ///
	| pro_code == 46 | pro_code == 63
	replace pro_wave = 2015 if pro_code == 14 | pro_code == 15 | pro_code == 23 | pro_code == 32 ///
	| pro_code == 33 | pro_code == 34 | pro_code == 35 | pro_code == 37 ///
	| pro_code == 50 | pro_code == 52 | pro_code == 53 | pro_code == 64
	replace pro_wave = 2016 if pro_code == 11 | pro_code == 12 | pro_code == 13 ///
	| pro_code == 21 | pro_code == 36 | pro_code == 41 | pro_code == 43 ///
	| pro_code == 45 | pro_code == 51 | pro_code == 54 | pro_code == 61 | pro_code == 62 | pro_code == 65

	*** continous measure: how much of the court expenditure coming from the city government 
	gen exp_ratio = exp_court / court_exp_total                  
	replace     exp_ratio =. if exp_ratio < 0

	*** dummy measure: whether at least half of the expenditure of the court coming from the city government  
	gen exp_ratio50 = (exp_ratio > 0.5) if !mi(exp_ratio)  
	gen pro_year = pro_code*10000 + year

	* Rescaling outcome and covariates
	replace court_exp_total = court_exp_total/100   // million
	replace income_total = income_total/100
	replace exp_enforce = exp_enforce/100
	replace exp_trial = exp_trial/100
	replace exp_welfare = exp_welfare/100
	replace exp_salary = exp_salary/100
	replace exp_total = exp_total/100   // total expenditure
	gen other_bene = exp_welfare - exp_salary  // benefits:welfare cost excluding the basic salary 
	gen ln_gdp_percapita_c = ln(gdp_percapita_c + 1)
	gen ln_tax_vadded_c = ln(tax_vadded_c / 100 +1)
	gen ln_investment_fixed_c = ln(investment_fixed_c / 100 +1)
	gen ln_per_publicfinance_income_c = ln(publicfinance_income_c / population_hr_c  +1) // percapita
	gen ln_per_publicfinance_exp_c = ln(publicfinance_expenditure_c / population_hr_c  +1) // percapita
	gen ln_unemployment_c = population_unemployed_c / (population_hr_c*100) 
	gen ln_population_hr_c = ln(population_hr_c/100 +1) 
	gen ln_city_total_ipo = ln(city_total_ipo +1)
	replace city_total_ipo=0 if city_total_ipo==.
	replace tax_vadded_c = tax_vadded_c / 100000   // billion
	replace investment_fixed_c = investment_fixed_c / 100000000   // trillion
	replace gdp_percapita_c = gdp_percapita_c / 1000    // thousand
	replace publicfinance_income_c = (publicfinance_income_c/1000) / population_hr_c //per-capita(thousand)
	replace publicfinance_expenditure_c = (publicfinance_expenditure_c/1000) / population_hr_c  //per-capita(thousand)
	replace population_unemployed_c = population_unemployed_c/10000 //ten thounds
	replace population_unemployed_c = (population_unemployed_c / (population_hr_c))*100  // unemployment rate 
	replace population_hr_c = population_hr_c/100    // million
	replace pro_pop = pro_pop/100   // million
	replace pro_gdp = pro_gdp/10 // billion
	replace pro_rev = pro_rev/10 // billion
	replace pro_exp = pro_exp/10 // billion
	replace rev_vat = rev_vat/10 // billion
	gen pro_gdp_percapita = pro_gdp/(pro_pop/1000)  // Yuan
	gen pro_rev_percapita = pro_rev/(pro_pop/1000)  // Yuan
	gen pro_exp_percapita = pro_exp/(pro_pop/1000)  // Yuan

	foreach x in income_total  exp_total  exp_salary other_bene  exp_enforce exp_trial {
	gen ln_`x'=ln(`x' + 1) 
	}

	gen lexp_npc =.
	replace lexp_npc = ln(exp_npc + 1)
	
	gen lexp_ps =.
	replace lexp_ps = ln(exp_ps + 1)  
	
	gen lexp_edu =.
	replace lexp_edu = ln(exp_edu + 1)  
	
	gen lexp_rd =.
	replace lexp_rd = ln(exp_rd + 1) 
		
	gen lexp_culture =.
	replace lexp_culture = ln(exp_culture + 1)  
	
	gen lexp_env =.
	replace lexp_env = ln(exp_env*10000 + 1)  

	* Drop unused variables
	drop reform_0_year reform_10_year reform_20_year reform_30_year reform_40_year
	drop reform_100_year reform_90_year reform_80_year reform_70_year reform_60_year
	drop high_expbase mean_roa mean_alr pro_gdp pro_rev pro_exp 
	drop exp_npc exp_ps exp_edu exp_rd exp_env exp_culture exp_welfare
	drop income_total exp_total exp_salary exp_trial exp_enforce other_bene 
	drop type 

	save ../data/fiscal_dependence.dta, replace 
	
*********************** Build political connection dataset **************************
	clear
	use ../source/city_panel
	replace city_id=city_id/100 if !mi(city_id)

	gen sex=1 if c_sex=="男"
	replace sex=0 if c_sex=="女"
	gen mayor_sec=0 if cityps=="书记"
	replace mayor_sec=1 if cityps=="市长"
	global cont connection_home connection_college connection_work connection_prom c_edu age sex c_tenure c_firstyearml c_central_exp
	keep $cont mayor_sec city_id year
	drop if city_id==.|year==.|mayor_sec==.
	reshape wide $cont, i(city_id year) j(mayor_sec) // from long to wide

	label var c_central_exp0 "central government experience secretary"
	label var c_central_exp1 "central government experience mayor" 
	label var c_edu0 "education secretary"
	label var c_edu1 "educatione mayor"
	label var c_firstyearml0 "first year in office secretary"
	label var c_firstyearml1 "first year in officee mayor"
	label var c_tenure0 "tenure length secretary"
	label var c_tenure1 "tenure lengthe mayor"
	label var connection_college0 "conneciton anlumni secretary"
	label var connection_college1 "conneciton anlumni mayor"
	label var connection_home0 "conneciton hometown secretary"
	label var connection_home1 "conneciton hometowne mayor"
	label var connection_prom0 "conneciton promotion secretary"
	label var connection_prom1 "conneciton promotione mayor"
	label var connection_work0 "conneciton workplace secretary"
	label var connection_work1 "conneciton workplacee mayor" 
	label var sex0 "gender secretary"
	label var sex1 "gender mayor"
	label var city_id  "city id"
	label var age0 "age secretary" 
	label var age1 "age mayor"
	
	save ../data/connection,replace

